Load packages

## add 'developer' to packages to be installed from github
x <- c("devtools", "maRce10/warbleR", "readxl", "ranger", "caret", "e1071", "pbapply", "viridis", "ggplot2", "kableExtra", "rlang", "Sim.DiffProc", "soundgen", "markovchain", "igraph", "TraMineR", "spgs")

aa <- lapply(x, function(y) {
  
  # get pakage name
  pkg <- strsplit(y, "/")[[1]]
  pkg <- pkg[length(pkg)]
  
  # check if installed, if not then install 
  if (!pkg %in% installed.packages()[,"Package"])  {

      if (grepl("/", y))  devtools::install_github(y, force = TRUE) else
    install.packages(y) 
    }

  # load package
  try(require(pkg, character.only = T), silent = T)
})

Functions and global parameters

warbleR_options(wl = 1100, parallel = parallel::detectCores() - 1, bp = "frange", fast = TRUE, threshold = 15, ovlp = 20)

opts_knit$set(root.dir = "..")

# set evaluation false
opts_chunk$set( fig.width = 6, fig.height = 3, eval = FALSE, warning = FALSE, message = FALSE, tidy = TRUE)

# number of trees in Random Forest models
num.trees <- 2000

# replicates in Random Forest replication
reps <- 50

# sensitivity cutoff
cutoff <- 0.86

# function to calculate classification random forest models with balanced sample sizes across categories
balanced_rf <- function(X, num.trees = 1000, random = FALSE, seed = 506){
  
    # get smallest n across individuals
    min.n <- min(table(X$indiv)) 
  
    # use seed
    set.seed(seed)
    
    
    # randomly get rows for equal n across indivs
    sel_rows <-
    sapply(unique(X$indiv), function(x)
      sample(rownames(X)[X$indiv == x], min.n, replace = FALSE))
  
  # subset to those rows  
  X <- X[c(sel_rows), ]
  
  # convert to factor
  if (random){ 
    
    # use seed
    set.seed(seed)

    X$indiv <- sample(X$indiv)
  }
  
   # make it a factor for ranger to work 
  X$indiv <- as.factor(X$indiv)
  
  # run RF model spectral and cepstral parameters
  rfm <-
    ranger(
      indiv ~ .,
      data = X[, !names(X) %in% c("sound.files", "selec")],
      num.trees = num.trees,
      importance = "impurity",
      probability = TRUE,
      seed = seed
    )
  
  # get predicted individual from probs
  pred_indiv <- apply(rfm$predictions, 1, function(x) colnames(rfm$predictions) [which.max(x)])
  
  rfm$predictions <- data.frame(rfm$predictions, indiv = X$indiv, pred_indiv, sound.files = X$sound.files)
  
  # remove X from start of names 
  names(rfm$predictions) <- gsub("^X", "", names(rfm$predictions))
  
   return(rfm)
  }


# function to calculate sensitivities at increasing RF class probabilities

sensitivity_fun <- function(X, parameters, thresholds = seq(0,1, by = 0.01)){

# get sensitivities for each group at very threshold
sensitiv_l <- lapply(X, function(x){

  # extract prediction data.frame
  Y <- x$aggregated_predictions
  Y$max <- apply(Y[, sapply(Y, is.numeric)], 1, max)
  
  # get sensitivity at different thresholds
  sensi_l <- lapply(thresholds, function(y) data.frame(sensitivity = sum(Y$pred_indiv[Y$max >= y] == Y$actual_indiv[Y$max >= y])/ sum(Y$max >= y), n = sum(Y$max >= y) / nrow(Y))) 
  
  sensi <- do.call(rbind, sensi_l)
  
  # add metadata
  sensi$group <- x$group 
  sensi$n_indiv <- x$n_indiv
  sensi$min_n <- x$min_n
  sensi$n_calls <- nrow(Y) * sensi$n

  return(sensi)
  })

# put in a data frame
sensitivities <- as.data.frame(lapply(sensitiv_l, "[[", which(names(sensitiv_l[[1]]) == "sensitivity")))

# get minimum sensitivity at each probabilities
sensitivities$min.sensitivity <- apply(sensitivities, 1, min, na.rm = TRUE)

# get minimum sensitivity at each probabilities
sensitivities$mean.sensitivity <- apply(sensitivities, 1, mean, na.rm = TRUE)

# add thresholds to data frame
sensitivities$thresholds <- thresholds


# put in a data frame
sensitivities$n_calls <- rowSums(as.data.frame(lapply(sensitiv_l, "[[", which(names(sensitiv_l[[1]]) == "n_calls"))), na.rm = TRUE)

sensitivities$n_calls_prop <- sensitivities$n_calls / max(sensitivities$n_calls)

sensitivities <- sensitivities[!is.infinite(sensitivities$mean.sensitivity), ]

sensitivities$parameters <- parameters    
return(sensitivities)  
} 

Read detections and prepare data

# read ext sel tab calls
clls <- readRDS("./output/CALLS_fixed_detections_inquiry_calls.RDS")

# read metadata
metadat <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", 
    sheet = "Experimento video coor vuelo"))
# nrow(metadat)

# remove video calibration metadat <- metadat[metadat$`tipo de video` !=
# 'calibracion de video', ]

metadat <- metadat[!is.na(metadat$Audio), ]

# get audio file name from sound files
clls$Audio <- sapply(clls$sound.files, function(x) {

    # split by _
    spl <- strsplit(x, split = "_")[[1]]

    # take 3rd
    spl <- spl[grep(".wav$", spl)]

    # remove .wav
    spl <- gsub(".wav", "", spl)

    # make it anumber
    spl <- as.numeric(spl)

    return(spl)
})

# anyNA(clls$Audio)

# nrow(clls)

clls <- clls[clls$Audio %in% metadat$Audio, ]

# add metadata to ext sel table
clls2 <- merge(clls, metadat[!duplicated(metadat$Audio), c("Audio", "tipo de video", 
    "Experimento", "Grupo", "Individuo")])

clls <- fix_extended_selection_table(X = clls2, Y = clls)

# exclude those with obstacles
clls <- clls[clls$Experimento != "Búsqueda refugio con obstaculos", ]


clls$Individuo[clls$Experimento != "vuelo solo"] <- NA

Measure acoustic parameters

# loop to measure acoustic parameters on each group
acous_param_l <- lapply(unique(clls$Grupo), function(x) {

    print(x)
    print(which(unique(clls$Grupo) == x)/length(unique(clls$Grupo)))

    # get 1 group data
    grp_test <- clls[clls$Grupo == x, ]

    # keep only solo flights
    grp_test <- grp_test[grp_test$Experimento != "vuelo grupal/enmascarando busqueda", 
        ]

    # remove low SNR calls
    grp_test <- sig2noise(grp_test, mar = 0.025, pb = FALSE)
    grp_test <- grp_test[grp_test$SNR > 1, ]
    grp_test$SNR <- NULL

    tb <- aggregate(Audio ~ Grupo + Experimento, data = grp_test, length)

    # if group flight then go, otherwise return NA
    if (any(grepl("grupal", tb$Experimento)) & length(unique(grp_test$Individuo[grp_test$Experimento == 
        "vuelo solo"])) > 1) {

        ######## measure acoustic structure ###### measure spectrographic parameters
        sp <- specan(grp_test, pb = FALSE, harmonicity = TRUE)

        # remove time parameters
        sp <- sp[, grep("time\\.", names(sp), invert = TRUE)]

        # check if there are NAs anyNA(sp)

        # measure cepstral coeffs
        cc <- mfcc_stats(grp_test, pb = FALSE)[, -c(1, 2)]

        # spectrographic cross correlation
        spxc <- xcorr(grp_test, pb = FALSE)

        # MDS
        spxc <- cmdscale(1 - spxc, k = 10, list. = TRUE)

        spxc_mds <- spxc$points

        colnames(spxc_mds) <- paste0("spxcMDS", 1:ncol(spxc_mds))

        # mfcc cross correlation
        mfccxc <- xcorr(grp_test, pb = FALSE, type = "mfcc")

        # MDS
        mfccxc <- cmdscale(1 - mfccxc, k = 10, list. = TRUE)

        mfxc_mds <- mfccxc$points

        colnames(mfxc_mds) <- paste0("mfxcMDS", 1:ncol(mfxc_mds))

        # dynamic time warping
        dtw.dists <- df_DTW(grp_test, img = FALSE, pb = FALSE, threshold.time = 1)

        # MDS
        dtw_mds <- cmdscale(dtw.dists, k = 10, list. = TRUE)$points

        # fix colnames
        colnames(dtw_mds) <- paste0("dtwMDS", 1:ncol(dtw_mds))

        # put parameters in a list
        acous_param_l <- list(sp, cc, dtw_mds, spxc_mds, mfxc_mds)

        # remove null ones
        acous_param_l <- acous_param_l[!sapply(acous_param_l, is.null)]

        # bind all acoustic structure parameters together
        all_acous_param <- do.call(cbind, acous_param_l)

        # scale
        all_acous_param[, -c(1, 2)] <- scale(all_acous_param[, -c(1, 2)])

        # add individual and experiment
        all_acous_param$indiv <- grp_test$Individuo
        all_acous_param$experiment <- grp_test$Experimento

        # remove bottom and top freq
        all_acous_param$top.freq <- all_acous_param$bottom.freq <- NULL

        return(all_acous_param)
    } else return(NULL)
})

names(acous_param_l) <- unique(clls$Grupo)


# remove those that did not have group flights
acous_param_l <- acous_param_l[!sapply(acous_param_l, is.null)]

# remove columns with NAs across all groups get names of columns that have any NA
# in at least 1 group (list element)
na_colnms <- unique(unlist(lapply(acous_param_l, function(x) names(x)[sapply(x, anyNA)])))

# keep indiv column
na_colnms <- na_colnms[na_colnms != "indiv"]

# remove NA columns for all groups
acous_param_l <- lapply(acous_param_l, function(x) x[, !names(x) %in% na_colnms])


# save as RDS
saveRDS(acous_param_l, "./data/processed/acoustic_parameters_all_groups_all_warbler_acoustic_measurements.RDS")

Run random forest all groups

# read acoustic parameter data
acous_param_l <- readRDS("./data/processed/acoustic_parameters_all_groups_all_warbler_acoustic_measurements.RDS")

# minimum sample size per group
min_n <- sapply(acous_param_l, function(x) min(table(x$indiv)))

# remove grpoups with less than 10 observations for minimum sample size 
acous_param_l <- acous_param_l[!names(acous_param_l) %in% names(min_n)[min_n < 10]]

# which parameters would be measured
param_categories <- c("mfcc",  "spxc", "mfxc", "dtw", "sp")

 # get actual parameter names
 col_names <- names(acous_param_l[[1]])
 col_names <- col_names[col_names != "experiment"]
 
 # measurement category for each measruremnt
 clss_col_names <- col_names
 
# name it by measurement function 
 for (i in  1:length(param_categories))
   clss_col_names[if (param_categories[i] != "sp") 
                              grepl(c("cc", "spxc", "mfxc", "dtw")[i], col_names) else
                              !grepl("cc|xc|dtw|indiv|sound.files|selec", col_names)] <- param_categories[i]



# all posible combinations of 2 and 3 parameters
combs4 <- combn(param_categories, 4)
combs3 <- combn(param_categories, 3)
combs2 <- combn(param_categories, 2)

# ake it a list
combs <- c(as.data.frame(combs4), as.data.frame(combs3), as.data.frame(combs2))

# add all 4 parameters as an option to the list
combs <- c(append(combs, list(param_categories)), param_categories)


for (i in 1:length(combs)) {
  
  rds_name <- paste0("./data/processed/averaged_random_forest_models_from_solo_flights_", paste(combs[[i]], collapse = "-"), ".RDS")
  
  # run if file doesn't exist
  if (!file.exists(rds_name)){
    
# avg_mods <- list()
# loop over list of acoustic parameters
avg_mods <- pblapply(names(acous_param_l),
# cl = .Options$warbleR$parallel,
cl = 1,
function(x){
  # for (x in names(acous_param_l)){
    print(x)              
      # extract data
  X <- acous_param_l[[which(names(acous_param_l) == x)]]
  
  # only solo flight
  solo_rf_input <- X[X$experiment == "vuelo solo", ]

  # rename rows for sel_rows
  rownames(solo_rf_input) <- 1:nrow(solo_rf_input)
  
  # order by sound file column
  solo_rf_input <- solo_rf_input[order(solo_rf_input$sound.files), ]
  
  # remove experiment column
  solo_rf_input$experiment <- NULL
 
  # subset columns to keep only those from selected acoustic measurements 
  solo_rf_input <- solo_rf_input[ , col_names[clss_col_names %in% c(combs[[i]], "sound.files", "selec", "indiv")]]
 
  # run random forest, set a seed to make it replicable
  rf_results <- pblapply(1:reps, function(x) balanced_rf(X = solo_rf_input, num.trees = num.trees, seed = x), cl = 1)
  
  # merge together predictions by sound files
  rf_preds <- lapply(rf_results, function(x){
    mrg <- merge(data.frame(sound.files = solo_rf_input$sound.files), x$predictions[, grep("indiv$", names(x$predictions), invert = TRUE)], all.x = TRUE)

  mrg <- mrg[order(mrg$sound.files), -1]   
  }
 )
  
  # add column (individual) if not found 
  rf_preds <- lapply(rf_preds, function(x){
    
    if(ncol(x) < length(unique(solo_rf_input$indiv))){
      # how many columns are missing
      mssng <- length(unique(solo_rf_input$indiv)) - ncol(x)
      
      # add missing columns
      for(i in 1:(mssng)) x <- data.frame(x, NA, check.names = FALSE)
    names(x)[(ncol(x) - mssng + 1):ncol(x)] <- setdiff(unique(solo_rf_input$indiv), names(x)) 
    }
    return(x)
  })
  
  # get together predictions from the same individual
  preds_by_indv <- lapply(1:ncol(rf_preds[[1]]), function(y)
    do.call(cbind, lapply(rf_preds, "[", y)) 
  )
   
  agg_preds <- as.data.frame(lapply(preds_by_indv, rowMeans, na.rm = TRUE))  
  
  # add individual name to columns
  names(agg_preds) <- names(rf_preds[[1]])
  
  # add sound file column
  agg_preds$sound.files <- solo_rf_input$sound.files
  
    # get predicted indiv from aggregated probabilities 
  agg_preds$pred_indiv <- apply(agg_preds[, sapply(agg_preds, is.numeric)], 1, function(x) colnames(agg_preds)[which.max(x)])

  # make it a factor
  pred_indiv <- factor(agg_preds$pred_indiv, levels = unique(solo_rf_input$indiv))
  agg_preds$actual_indiv <- actual_indiv <- factor(solo_rf_input$indiv, levels = unique(solo_rf_input$indiv))
  
  # get confusion matrix
  cm_solo <- confusionMatrix(pred_indiv, reference = actual_indiv)
  
  ### NULL MODEL
  # run null model by randomizing indiv labels
  rf_null_results <- pblapply(1:reps, function(x) balanced_rf(X = solo_rf_input, num.trees = num.trees, random = TRUE, seed = x), cl = 1)
  
  # get accuracies form null models  
    # merge together predictions by sound files
  rf_null_preds <- lapply(rf_null_results, function(x){
    mrg <- merge(data.frame(sound.files = solo_rf_input$sound.files), x$predictions[, grep("indiv$", names(x$predictions), invert = TRUE)], all.x = TRUE)

  mrg <- mrg[order(mrg$sound.files), -1]   
  }
 )

  # add column (individual) if not found 
  rf_null_preds <- lapply(rf_null_preds, function(x){
    
    if(ncol(x) < length(unique(solo_rf_input$indiv))){
      # how many columns are missing
      mssng <- length(unique(solo_rf_input$indiv)) - ncol(x)
      
      # add missing columns
      for(i in 1:(mssng)) x <- data.frame(x, NA, check.names = FALSE)
    
      names(x)[(ncol(x) - mssng + 1):ncol(x)] <- setdiff(unique(solo_rf_input$indiv), names(x)) 
    }
    return(x)
  })
  
  # get together predictions from the same individual
  preds_by_indv_null <- lapply(1:ncol(rf_null_preds[[1]]), function(y)
    do.call(cbind, lapply(rf_null_preds, "[", y)) 
  )
   
  agg_preds_null <- as.data.frame(lapply(preds_by_indv_null, rowMeans, na.rm = TRUE))  
  
  # add individual name to columns
  names(agg_preds_null) <- names(rf_null_preds[[1]])
  
  # add sound file column
  agg_preds_null$sound.files <- solo_rf_input$sound.files
  
    # get predicted indiv from aggregated probabilities 
  agg_preds_null$pred_indiv <- apply(agg_preds_null[, sapply(agg_preds_null, is.numeric)], 1, function(x) colnames(agg_preds_null)[which.max(x)])

  # make it a factor
  pred_indiv_null <- factor(agg_preds_null$pred_indiv, levels = unique(solo_rf_input$indiv))
  actual_indiv <- factor(solo_rf_input$indiv, levels = unique(solo_rf_input$indiv))
  
  # get confusion matrix
  cm_solo_null <- confusionMatrix(pred_indiv_null, reference = actual_indiv)
  
  ### NOTE: ranger() OOB prediction error and confusionMatrix() Accuracy are the same
  # put all results together
  output <- list(group = x, accuracy = cm_solo$overall[1], null_accuracy = cm_solo_null$overall[1], aggregated_predictions = agg_preds, conf_matrix = cm_solo, random_forests = rf_results, n_indiv = length(unique(solo_rf_input$indiv)), min_n = min(table(solo_rf_input$indiv)), parameters = combs[[i]])

  # avg_mods[[length(avg_mods) + 1]] <- output
  # rm(output)
  
return(output)
  }
)

# add group name to list
names(avg_mods) <- names(acous_param_l)

# save as RDS
saveRDS(avg_mods, rds_name)
  }
}

Predict solo flight optimizing sensitivity for different acoustic parameter sets

  • Prediction with all solo flight data with a sensitivy threshold of 0.86 for different acoustic parameters as predictors
# read rf outputs

rds_rf_outputs <- list.files(path = "./data/processed", pattern = "averaged_random_forest_models", 
    full.names = TRUE)

model_diagnostics_l <- pblapply(rds_rf_outputs, cl = 1, function(x) {

    # read data
    avg_mods <- readRDS(x)

    sensitivities <- sensitivity_fun(X = avg_mods, parameters = paste(avg_mods[[1]]$parameters, 
        collapse = "-"))

    # calculate threshold at cutoff
    thresh_prob <- min(sensitivities$thresholds[sensitivities$mean.sensitivity >= 
        cutoff])

    avg_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "accuracy"))

    null_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "null_accuracy"))

    filtered_accuracy <- unlist(sensitivities[sensitivities$thresholds == thresh_prob, 
        1:length(null_accuracy)])

    diagnostics <- data.frame(group = gsub("X", "", names(filtered_accuracy)), model = rep(c("real data", 
        "null model", "filtered model"), each = length(avg_accuracy)), accuracy = c(avg_accuracy, 
        null_accuracy, filtered_accuracy), acoustic_parametes = gsub("./data/processed/averaged_random_forest_models_from_solo_flights_|.RDS", 
        "", x))

    diagnostics$model <- factor(diagnostics$model, levels = c("null model", "filtered model", 
        "real data"))

    return(list(diagnostics = diagnostics, sensitivities = sensitivities))
})

# put in a data frame
model_diagnostics <- do.call(rbind, lapply(model_diagnostics_l, "[[", 1))


saveRDS(model_diagnostics, "./data/processed/random_forests_diagnostics_several_acoustic_parameter_combinations_solo_flight.RDS")

# save sensitivities

senstivities_l <- lapply(model_diagnostics_l, "[[", 2)

# determine all column names in all selection tables
cnms <- unique(unlist(lapply(senstivities_l, names)))

# add columns that are missing to each selection table
senstivities_l <- lapply(senstivities_l, function(X) {
    nms <- names(X)
    if (length(nms) != length(cnms)) 
        for (i in cnms[!cnms %in% nms]) {
            X <- data.frame(X, NA, stringsAsFactors = FALSE, check.names = FALSE)
            names(X)[ncol(X)] <- i
        }
    return(X)
})


model_sensitivities <- do.call(rbind, senstivities_l)


saveRDS(model_sensitivities, "./data/processed/random_forests_sensitivities_several_acoustic_parameter_combinations_solo_flight.RDS")
# read diagnostic
model_diagnostics <- readRDS("./data/processed/random_forests_diagnostics_several_acoustic_parameter_combinations_solo_flight.RDS")

# make factor to order plot x axis ticks
model_diagnostics$acoustic_parametes <- factor(model_diagnostics$acoustic_parametes, 
    levels = sort(unique(model_diagnostics$acoustic_parametes)))

# make it numeric for points/lines
model_diagnostics$model <- factor(model_diagnostics$model)
model_diagnostics$model_num <- as.numeric(model_diagnostics$model)
model_diagnostics$model_num[model_diagnostics$model != "null model"] <- 2


# density plots
ggplot(model_diagnostics[model_diagnostics$model != "filtered model", ], aes(x = accuracy, 
    fill = model)) + geom_density(alpha = 0.4) + theme_classic() + scale_fill_viridis_d() + 
    ggtitle("Original (non-filtered) accuracies") + labs(x = "Mean accuracy", y = "Frequency") + 
    facet_wrap(~acoustic_parametes, ncol = 4)

ggplot(model_diagnostics[model_diagnostics$model != "real data", ], aes(x = accuracy, 
    fill = model)) + geom_density(alpha = 0.4) + theme_classic() + scale_fill_viridis_d() + 
    ggtitle("Optimized accuracies") + labs(x = "Mean accuracy", y = "Frequency") + 
    facet_wrap(~acoustic_parametes, ncol = 4)

# 
sensitivities <- readRDS("./data/processed/random_forests_sensitivities_several_acoustic_parameter_combinations_solo_flight.RDS")


# Probability threshold vs Mean sensitivty ggplot(data = sensitivities, aes(x =
# thresholds, y = mean.sensitivity)) + geom_hline(yintercept = cutoff, col =
# magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + # geom_vline(xintercept =
# thresh_prob, col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) +
# geom_point(col = magma(10, alpha = 0.7)[2]) + geom_line(col = magma(10, alpha =
# 0.7)[2]) + labs(y = 'Mean sensitivty', x = 'Probability threshold') +
# theme_classic() + # annotate(geom = 'text', x = thresh_prob + 0.04, y = 0.9,
# label = round(thresh_prob, 2)) + facet_wrap(~ parameters, ncol = 4) # calculate
# threshold at n_call_cutoff <-
# max(sensitivities$n_calls_prop[sensitivities$mean.sensitivity >= cutoff])


# # Probability threshold vs Proportion of calls used ggplot(data =
# sensitivities, aes(x = thresholds, y = n_calls_prop)) + geom_hline(yintercept =
# n_call_cutoff, col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + #
# geom_vline(xintercept = thresh_prob, col = magma(10, alpha = 0.7)[8], size =
# 1.6, lty = 3) + geom_point(col = magma(10, alpha = 0.7)[2]) + geom_line(col =
# magma(10, alpha = 0.7)[2]) + labs(y = 'Proportion of calls used', x =
# 'Probability threshold') + theme_classic() + # annotate(geom = 'text', x =
# n_call_cutoff + 0.04, y = 0.9, label = round(n_call_cutoff, 2)) facet_wrap(~
# parameters, ncol = 4) ggplot(data = sensitivities, aes(x = mean.sensitivity, y
# = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff, col = magma(10, alpha
# = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = cutoff, col =
# magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10,
# alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y =
# 'Proportion of calls used', x = 'Sensitivity') + theme_classic() + #
# annotate(geom = 'text', y = n_call_cutoff + 0.04, x = cutoff, label =
# round(n_call_cutoff, 2)) facet_wrap(~ parameters, ncol = 4)



maxs <- sapply(split(sensitivities, sensitivities$parameters), function(Z) max(Z$n_calls_prop[Z$mean.sensitivity >= 
    cutoff]))

max_prop_calls <- data.frame(data_set = names(maxs), max_prop_calls = maxs)

max_prop_calls <- max_prop_calls[order(-max_prop_calls$max_prop_calls), ]

df1 <- knitr::kable(max_prop_calls, row.names = FALSE, escape = FALSE, format = "html", 
    digits = 2)

kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
    full_width = FALSE, font_size = 18)
data_set max_prop_calls
mfcc-spxc-mfxc-sp 0.98
mfcc-spxc-mfxc-dtw-sp 0.97
mfcc-mfxc-dtw-sp 0.96
mfcc-mfxc-sp 0.95
mfcc-spxc-sp 0.94
mfcc-spxc-dtw-sp 0.93
mfcc-spxc-mfxc-dtw 0.93
mfcc-sp 0.92
mfcc-dtw-sp 0.92
mfcc-spxc-mfxc 0.92
mfcc-mfxc 0.91
mfcc-mfxc-dtw 0.91
spxc-mfxc 0.91
spxc-mfxc-dtw 0.89
spxc-mfxc-dtw-sp 0.88
spxc-mfxc-sp 0.87
mfcc-spxc-dtw 0.86
mfcc-spxc 0.86
spxc-sp 0.83
spxc-dtw-sp 0.83
mfcc 0.83
mfxc-dtw-sp 0.82
mfcc-dtw 0.81
sp-mfcc-dtw 0.79
spxc 0.79
mfxc-sp 0.78
spxc-dtw 0.73
dtw-sp 0.71
mfxc 0.71
sp 0.69
mfxc-dtw 0.68
dtw 0.01

Predict solo flight optimizing sensitivity

  • Prediction with all solo flight data with a sensitivy threshold of 0.86 for acoustic parameters: mfcc-spxc-mfxc-sp
# read rf outputs
avg_mods <- readRDS(paste0("./data/processed/averaged_random_forest_models_from_solo_flights_", 
    max_prop_calls$data_set[1], ".RDS"))

sensitivities <- sensitivity_fun(X = avg_mods, parameters = "mfcc-sp")

# calculate threshold at cutoff
thresh_prob <- min(sensitivities$thresholds[sensitivities$mean.sensitivity >= cutoff])

avg_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "accuracy"))

null_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "null_accuracy"))

filtered_accuracy <- unlist(sensitivities[sensitivities$thresholds == thresh_prob, 
    1:length(null_accuracy)])

df <- data.frame(group = gsub("X", "", names(filtered_accuracy)), model = rep(c("real data", 
    "null model", "filtered model"), each = length(avg_accuracy)), accuracy = c(avg_accuracy, 
    null_accuracy, filtered_accuracy))

df$model <- factor(df$model, levels = c("null model", "filtered model", "real data"))

# violin plots
ggplot(df[df$model != "filtered model", ], aes(y = accuracy, x = model, fill = model)) + 
    geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle("Original accuracies") + 
    labs(y = "Mean accuracy", x = "Model") + geom_point(aes(x = rep(c(2, 1), each = nrow(df)/3)), 
    size = 5, show.legend = FALSE, col = "gray43", alpha = 0.8) + geom_line(aes(x = rep(c(2, 
    1), each = nrow(df)/3), group = group), col = "gray43", alpha = 0.8)

ggplot(df[df$model != "real data", ], aes(y = accuracy, x = model, fill = model)) + 
    geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle("Optimized accuracies") + 
    labs(y = "Mean accuracy", x = "Model") + geom_point(aes(x = rep(c(1, 2), each = nrow(df)/3)), 
    size = 3, show.legend = FALSE, col = "gray43", alpha = 0.8) + geom_line(aes(x = rep(c(1, 
    2), each = nrow(df)/3), group = group), col = "gray43", alpha = 0.8)

# density plots
ggplot(df[df$model != "filtered model", ], aes(x = accuracy, fill = model)) + geom_density(alpha = 0.4) + 
    theme_classic() + scale_fill_viridis_d() + ggtitle("Original (non-filtered) accuracies") + 
    labs(x = "Mean accuracy", y = "Frequency")

ggplot(df[df$model != "real data", ], aes(x = accuracy, fill = model)) + geom_density(alpha = 0.4) + 
    theme_classic() + scale_fill_viridis_d() + ggtitle("Optimized accuracies") + 
    labs(x = "Mean accuracy", y = "Frequency")

# Probability threshold vs Mean sensitivty
ggplot(data = sensitivities, aes(x = thresholds, y = mean.sensitivity)) + geom_hline(yintercept = cutoff, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = thresh_prob, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, 
    alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Mean sensitivty", 
    x = "Probability threshold") + theme_classic() + annotate(geom = "text", x = thresh_prob + 
    0.04, y = 0.9, label = round(thresh_prob, 2))

# calculate threshold at
n_call_cutoff <- max(sensitivities$n_calls_prop[sensitivities$mean.sensitivity >= 
    cutoff])

# Probability threshold vs Proportion of calls used
ggplot(data = sensitivities, aes(x = thresholds, y = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = thresh_prob, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, 
    alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Proportion of calls used", 
    x = "Probability threshold") + theme_classic() + annotate(geom = "text", x = n_call_cutoff + 
    0.04, y = 0.9, label = round(n_call_cutoff, 2))

ggplot(data = sensitivities, aes(x = mean.sensitivity, y = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = cutoff, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, 
    alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Proportion of calls used", 
    x = "Sensitivity") + theme_classic() + annotate(geom = "text", y = n_call_cutoff + 
    0.04, x = cutoff, label = round(n_call_cutoff, 2))

# # remove group 42 AND 6 avg_mods <- avg_mods[!names(avg_mods) %in% c('42', '6',
# '21', '29')] # calculate sensitivities sensitivities <- sensitivity_fun(X =
# avg_mods, parameters = max_prop_calls$data_set[1]) # calculate threshold at
# cutoff thresh_prob <-
# min(sensitivities$thresholds[sensitivities$mean.sensitivity >= cutoff])
# avg_accuracy <- sapply(avg_mods, '[[', which(names(avg_mods[[1]]) ==
# 'accuracy')) null_accuracy <- sapply(avg_mods, '[[', which(names(avg_mods[[1]])
# == 'null_accuracy')) filtered_accuracy <-
# unlist(sensitivities[sensitivities$thresholds == thresh_prob,
# 1:length(null_accuracy)]) df <- data.frame(group = gsub('X', '',
# names(filtered_accuracy)), model = rep(c('real data', 'null model', 'filtered
# model'), each = length(avg_accuracy)), accuracy = c(avg_accuracy,
# null_accuracy, filtered_accuracy)) df$model <- factor(df$model, levels =
# c('null model', 'filtered model', 'real data')) # violin plots
# ggplot(df[df$model != 'filtered model', ], aes(y = accuracy, x = model, fill =
# model)) + geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) +
# ggtitle('Original accuracies') + labs(y = 'Mean accuracy', x = 'Model') +
# geom_point(aes(x = rep(c(2, 1), each = nrow(df)/3)), size = 5, show.legend =
# FALSE, col = 'gray43', alpha = 0.8) + geom_line(aes(x = rep(c(2, 1), each =
# nrow(df)/3), group = group), col = 'gray43', alpha = 0.8) ggplot(df[df$model !=
# 'real data', ], aes(y = accuracy, x = model, fill = model)) + geom_violin() +
# theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle('Optimized
# accuracies') + labs(y = 'Mean accuracy', x = 'Model') + geom_point(aes(x =
# rep(c(1, 2), each = nrow(df)/3)), size = 3, show.legend = FALSE, col =
# 'gray43', alpha = 0.8) + geom_line(aes(x = rep(c(1, 2), each = nrow(df)/3),
# group = group), col = 'gray43', alpha = 0.8) # density plots ggplot(df[df$model
# != 'filtered model', ], aes(x = accuracy, fill = model)) + geom_density(alpha =
# 0.4) + theme_classic() + scale_fill_viridis_d() + ggtitle('Original
# (non-filtered) accuracies') + labs(x = 'Mean accuracy', y = 'Frequency')
# ggplot(df[df$model != 'real data', ], aes(x = accuracy, fill = model)) +
# geom_density(alpha = 0.4) + theme_classic() + scale_fill_viridis_d() +
# ggtitle('Optimized accuracies') + labs(x = 'Mean accuracy', y = 'Frequency')
# ggplot(data = sensitivities, aes(x = thresholds, y = mean.sensitivity)) +
# geom_hline(yintercept = cutoff, col = magma(10, alpha = 0.7)[8], size = 1.6,
# lty = 3) + geom_vline(xintercept = thresh_prob, col = magma(10, alpha =
# 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, alpha = 0.7)[2]) +
# geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = 'Mean sensitivty', x =
# 'Probability threshold') + theme_classic() + annotate(geom = 'text', x =
# thresh_prob + 0.04, y = 0.9, label = round(thresh_prob, 2)) # calculate
# threshold at n_call_cutoff <-
# max(sensitivities$n_calls_prop[sensitivities$mean.sensitivity >= cutoff])
# ggplot(data = sensitivities, aes(x = thresholds, y = n_calls_prop)) +
# geom_hline(yintercept = n_call_cutoff, col = magma(10, alpha = 0.7)[8], size =
# 1.6, lty = 3) + geom_vline(xintercept = thresh_prob, col = magma(10, alpha =
# 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, alpha = 0.7)[2]) +
# geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = 'Proportion of calls
# used', x = 'Probability threshold') + theme_classic() + annotate(geom = 'text',
# x = n_call_cutoff + 0.04, y = 0.9, label = round(n_call_cutoff, 2)) ggplot(data
# = sensitivities, aes(x = mean.sensitivity, y = n_calls_prop)) +
# geom_hline(yintercept = n_call_cutoff, col = magma(10, alpha = 0.7)[8], size =
# 1.6, lty = 3) + geom_vline(xintercept = cutoff, col = magma(10, alpha =
# 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, alpha = 0.7)[2]) +
# geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = 'Proportion of calls
# used', x = 'Sensitivity') + theme_classic() + annotate(geom = 'text', y =
# n_call_cutoff + 0.04, x = cutoff, label = round(n_call_cutoff, 2))
  • Probability threshold of 0.28

  • Prediction removing groups 42 and 6 (low sensitivity)
  • Sensitivy threshold of 0.86
# remove group 42 AND 6
avg_mods <- avg_mods[!names(avg_mods) %in% c("42", "6", "21", "29")]

# calculate sensitivities
sensitivities <- sensitivity_fun(X = avg_mods, parameters = max_prop_calls$data_set[1])

# calculate threshold at cutoff
thresh_prob <- min(sensitivities$thresholds[sensitivities$mean.sensitivity >= cutoff])

avg_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "accuracy"))

null_accuracy <- sapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "null_accuracy"))

filtered_accuracy <- unlist(sensitivities[sensitivities$thresholds == thresh_prob, 
    1:length(null_accuracy)])

df <- data.frame(group = gsub("X", "", names(filtered_accuracy)), model = rep(c("real data", 
    "null model", "filtered model"), each = length(avg_accuracy)), accuracy = c(avg_accuracy, 
    null_accuracy, filtered_accuracy))

df$model <- factor(df$model, levels = c("null model", "filtered model", "real data"))

# violin plots
ggplot(df[df$model != "filtered model", ], aes(y = accuracy, x = model, fill = model)) + 
    geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle("Original accuracies") + 
    labs(y = "Mean accuracy", x = "Model") + geom_point(aes(x = rep(c(2, 1), each = nrow(df)/3)), 
    size = 5, show.legend = FALSE, col = "gray43", alpha = 0.8) + geom_line(aes(x = rep(c(2, 
    1), each = nrow(df)/3), group = group), col = "gray43", alpha = 0.8)

ggplot(df[df$model != "real data", ], aes(y = accuracy, x = model, fill = model)) + 
    geom_violin() + theme_classic() + scale_fill_viridis_d(alpha = 0.4) + ggtitle("Optimized accuracies") + 
    labs(y = "Mean accuracy", x = "Model") + geom_point(aes(x = rep(c(1, 2), each = nrow(df)/3)), 
    size = 3, show.legend = FALSE, col = "gray43", alpha = 0.8) + geom_line(aes(x = rep(c(1, 
    2), each = nrow(df)/3), group = group), col = "gray43", alpha = 0.8)

# density plots
ggplot(df[df$model != "filtered model", ], aes(x = accuracy, fill = model)) + geom_density(alpha = 0.4) + 
    theme_classic() + scale_fill_viridis_d() + ggtitle("Original (non-filtered) accuracies") + 
    labs(x = "Mean accuracy", y = "Frequency")

ggplot(df[df$model != "real data", ], aes(x = accuracy, fill = model)) + geom_density(alpha = 0.4) + 
    theme_classic() + scale_fill_viridis_d() + ggtitle("Optimized accuracies") + 
    labs(x = "Mean accuracy", y = "Frequency")

ggplot(data = sensitivities, aes(x = thresholds, y = mean.sensitivity)) + geom_hline(yintercept = cutoff, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = thresh_prob, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, 
    alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Mean sensitivty", 
    x = "Probability threshold") + theme_classic() + annotate(geom = "text", x = thresh_prob + 
    0.04, y = 0.9, label = round(thresh_prob, 2))

# calculate threshold at
n_call_cutoff <- max(sensitivities$n_calls_prop[sensitivities$mean.sensitivity >= 
    cutoff])


ggplot(data = sensitivities, aes(x = thresholds, y = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = thresh_prob, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, 
    alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Proportion of calls used", 
    x = "Probability threshold") + theme_classic() + annotate(geom = "text", x = n_call_cutoff + 
    0.04, y = 0.9, label = round(n_call_cutoff, 2))

ggplot(data = sensitivities, aes(x = mean.sensitivity, y = n_calls_prop)) + geom_hline(yintercept = n_call_cutoff, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_vline(xintercept = cutoff, 
    col = magma(10, alpha = 0.7)[8], size = 1.6, lty = 3) + geom_point(col = magma(10, 
    alpha = 0.7)[2]) + geom_line(col = magma(10, alpha = 0.7)[2]) + labs(y = "Proportion of calls used", 
    x = "Sensitivity") + theme_classic() + annotate(geom = "text", y = n_call_cutoff + 
    0.04, x = cutoff, label = round(n_call_cutoff, 2))

  • Probability threshold of 0.21

Predict group flight for experiments without playback

  • Sensitivy threshold of 0.86 derived from solo flight optimization, which is produced by a probability threshold of 0.21
# read acoustic parameter data
acous_param_l <- readRDS("./data/processed/acoustic_parameters_all_groups_all_warbler_acoustic_measurements.RDS")

acous_param_l <- acous_param_l[names(acous_param_l) %in% names(avg_mods)]

# extract random forests from acous_param_l list
random_forests_l <- lapply(avg_mods, "[[", which(names(avg_mods[[1]]) == "random_forests"))

agg_preds_l <- lapply(names(random_forests_l), function(x) {

    # print(x)
    Z <- acous_param_l[[x]]
    Y <- Z[Z$experiment == "vuelo grupal/sin sonido", ]
    Y$indiv <- NULL

    # random forest models for this group
    rfms <- random_forests_l[[x]]

    # predict using all random forest models
    rf_preds <- lapply(rfms, function(x) predict(object = x, data = Y)$predictions)

    # add column (individual) if not found
    rf_preds <- lapply(rf_preds, function(x) {

        if (ncol(x) < length(unique(Z$indiv[Z$experiment == "vuelo solo"]))) {
            # how many columns are missing
            mssng <- length(unique(Z$indiv[Z$experiment == "vuelo solo"])) - ncol(x)

            # add missing columns with 0
            for (i in 1:(mssng)) x <- data.frame(x, 0, check.names = FALSE)
            names(x)[(ncol(x) - mssng + 1):ncol(x)] <- setdiff(unique(Z$indiv[Z$experiment == 
                "vuelo solo"]), names(x))
        }
        return(x)
    })

    # get together predictions from the same individual
    preds_by_indv <- lapply(1:ncol(rf_preds[[1]]), function(y) do.call(cbind, lapply(rf_preds, 
        function(e) e[, y])))

    agg_preds <- as.data.frame(lapply(preds_by_indv, rowMeans, na.rm = TRUE))

    # add individual name to columns
    names(agg_preds) <- colnames(rf_preds[[1]])

    # add sound file column
    agg_preds$sound.files <- Y$sound.files

    # get predicted indiv from aggregated probabilities
    agg_preds$pred_indiv <- apply(agg_preds[, sapply(agg_preds, is.numeric)], 1, 
        function(x) colnames(agg_preds)[which.max(x)])

    agg_preds$group <- x
    agg_preds$max_prob <- apply(agg_preds[, sapply(agg_preds, is.numeric)], 1, max)

    return(agg_preds)
})

# add group name to list
names(agg_preds_l) <- names(acous_param_l)


agg_pred <- do.call(rbind, lapply(agg_preds_l, function(x) x[, c("group", "sound.files", 
    "max_prob", "pred_indiv")]))

# remove those groups with low sensitivity
agg_pred <- agg_pred[!agg_pred$group %in% c("42", "6"), ]

# save as RDS
saveRDS(agg_pred, "./data/processed/predicted_individual_in_group_flights.RDS")
# read as RDS
agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights.RDS")

# get summary by group
summ_by_groups <- lapply(unique(agg_pred$group), function(x) {

    Y <- agg_pred[agg_pred$group == x, ]

    # total number of calls used
    n_used_calls <- sum(Y$max_prob > thresh_prob)

    # proportion of calls used
    prop_used_calls <- sum(Y$max_prob > thresh_prob)/nrow(Y)

    return(data.frame(group = x, n_used_calls, total_calls = nrow(Y), prop_used_calls))

})

summ_by_group <- do.call(rbind, summ_by_groups)

# add row with total across all groups
total_row <- data.frame(group = "All groups", n_used_calls = sum(summ_by_group$n_used_calls), 
    total_calls = sum(summ_by_group$total_calls))

total_row$prop_used_calls <- total_row$n_used_calls/total_row$total_calls

# bind totals to summary table
summ_by_group <- rbind(summ_by_group, total_row)

# print pretty table
df1 <- knitr::kable(summ_by_group, row.names = TRUE, escape = FALSE, format = "html", 
    digits = 2)

df1 <- kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", 
    "responsive"), full_width = FALSE, font_size = 18)

row_spec(df1, row = nrow(summ_by_group), bold = TRUE, color = "white", background = viridis(10)[8])
group n_used_calls total_calls prop_used_calls
1 4 69 69 1
2 3 176 176 1
3 1 46 46 1
4 40 3 3 1
5 9 39 39 1
6 15 59 59 1
7 13 5 5 1
8 18-A-B 20 20 1
9 6/? 72 72 1
10 12/12/N/N 15 15 1
11 12/NN 58 58 1
12 All groups 562 562 1

Analyze calling activity in solo vs group flight

Using all calls, including those with low class probabilities

# read acoustic parameter data
acous_param_l <- readRDS("./data/processed/acoustic_parameters_all_groups_all_warbler_acoustic_measurements.RDS")

# exclude groups with low sensitivity (42 & 6) or low sample size after removing
# low prob calls (29)
acous_param_l <- acous_param_l[!names(acous_param_l) %in% c("42", "6", "21", "29")]

# get group and solo call counts per individual
sl_vs_grps <- lapply(names(acous_param_l), function(x) {

    acous_param <- acous_param_l[[x]]

    group_file <- acous_param$sound.files[acous_param$experiment == "vuelo grupal/sin sonido"][1]

    # get sound file number for group flights
    group_file <- strsplit(group_file, ".wav")[[1]][1]
    group_file_num <- as.numeric(substr(group_file, start = nchar(group_file) - 6, 
        nchar(group_file)))

    # get data for solo flights
    solo <- acous_param[acous_param$experiment == "vuelo solo", ]

    sl_grp_cll <- lapply(unique(solo$indiv), function(y) {

        solo_calls <- sum(solo$indiv == y)

        group_calls <- sum(agg_pred$group == x & agg_pred$pred_indiv == y)

        # get sound file number for group flights
        solo_file <- solo$sound.files[solo$indiv == y][1]
        solo_file <- strsplit(solo_file, ".wav")[[1]][1]
        solo_file_num <- as.numeric(substr(solo_file, start = nchar(solo_file) - 
            6, nchar(solo_file)))

        return(data.frame(group = x, indiv = y, experiment = c("solo", "group"), 
            calls = c(solo_calls, group_calls), prop_calls = c(solo_calls/nrow(solo), 
                group_calls/sum(agg_pred$group == x)), sound_file_number = c(solo_file_num, 
                group_file_num)))

    })

    sl_grp_clls <- do.call(rbind, sl_grp_cll)

    return(sl_grp_clls)
})

sl_vs_grp <- do.call(rbind, sl_vs_grps)


# remove 21 due to no group calls left
sl_vs_grp <- sl_vs_grp[sl_vs_grp$group != "21", ]

# add metadata
caps <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", sheet = "Capturas"))

exps_mtdt <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", 
    sheet = "Experimento video coor vuelo"))

# time in seconds
exps_mtdt$flight_time <- round(as.numeric(exps_mtdt$`Tiempo de vuelo aprox`) * 60/0.04166667)

# remove NA in audio
exps_mtdt <- exps_mtdt[!is.na(exps_mtdt$Audio), ]

## Add flight time for solo and group flight
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {

    out <- if (sl_vs_grp$experiment[x] == "solo") 
        unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Individuo == sl_vs_grp$indiv[x] & 
            exps_mtdt$Experimento == "vuelo solo" & !is.na(exps_mtdt$Individuo)])) else unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Grupo == sl_vs_grp$group[x] & 
        exps_mtdt$Experimento == "vuelo grupal/sin sonido"]))

    return(out)
})


# flight time
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {

    ft <- unique(na.omit(exps_mtdt$flight_time[which(as.numeric(exps_mtdt$Audio) == 
        sl_vs_grp$sound_file_number[x])]))

    if (length(ft) == 0) 
        ft <- NA
    return(ft)
})

# add sex
sl_vs_grp$sex <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Sexo[caps$Murci == 
    x])[1], USE.NAMES = FALSE)

sl_vs_grp$sex <- ifelse(sl_vs_grp$sex == "m", "Male", "Female")

# add age
sl_vs_grp$age <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Edad[caps$Murci == 
    x])[1], USE.NAMES = FALSE)

sl_vs_grp$age <- ifelse(sl_vs_grp$age == "sa", "Sub-adult", "Adult")

sl_vs_grp$reprod.stg <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$`Estado reproductivo`[caps$Murci == 
    x])[1], USE.NAMES = FALSE)

sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "p?"] <- "Pregnant"
sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "ne"] <- "Inactive"

# order levels
sl_vs_grp$experiment <- factor(sl_vs_grp$experiment, levels = c("solo", "group"))


sl_vs_grp$call_rate <- sl_vs_grp$calls/(sl_vs_grp$flight_time/60)

# calling rate solo vs group flights ggplot(sl_vs_grp, aes(y = call_rate, x =
# experiment, col = indiv, shape = sex)) + geom_hline(yintercept =
# mean(sl_vs_grp$call_rate[sl_vs_grp$experiment == 'group']), lty = 2, alpha =
# 0.5) + geom_hline(yintercept = mean(sl_vs_grp$call_rate[sl_vs_grp$experiment ==
# 'solo'], na.rm = TRUE), lty = 2, alpha = 0.5) + geom_point(size = 5, alpha =
# 0.8) + theme_classic() + scale_color_viridis_d(alpha = 0.4, guide=FALSE) +
# ggtitle('Change in calling rate by sex and individual') + labs(x =
# 'Experiment', y = 'Calling rate (calls / min)') + geom_line(aes(group = indiv),
# col = 'gray43', alpha = 0.8) + facet_wrap(~ group, nrow = 4)
# Using only calls with class probabilities higher than `r thresh_prob`

# get group and solo call counts per individual
sl_vs_grps <- lapply(names(acous_param_l), function(x) {

    acous_param <- acous_param_l[[x]]

    group_file <- acous_param$sound.files[acous_param$experiment == "vuelo grupal/sin sonido"][1]

    # get sound file number for group flights
    group_file <- strsplit(group_file, ".wav")[[1]][1]
    group_file_num <- as.numeric(substr(group_file, start = nchar(group_file) - 6, 
        nchar(group_file)))

    # get data for solo flights
    solo <- acous_param[acous_param$experiment == "vuelo solo", ]

    sl_grp_cll <- lapply(unique(solo$indiv), function(y) {

        solo_calls <- sum(solo$indiv == y)

        # taking into acount those with class prob higher than
        group_calls <- sum(agg_pred$group == x & agg_pred$pred_indiv == y & agg_pred$max_prob > 
            thresh_prob)

        # get sound file number for group flights
        solo_file <- solo$sound.files[solo$indiv == y][1]
        solo_file <- strsplit(solo_file, ".wav")[[1]][1]
        solo_file_num <- as.numeric(substr(solo_file, start = nchar(solo_file) - 
            6, nchar(solo_file)))

        return(data.frame(group = x, indiv = y, experiment = c("solo", "group"), 
            calls = c(solo_calls, group_calls), prop_calls = c(solo_calls/nrow(solo), 
                group_calls/sum(agg_pred$group == x)), sound_file_number = c(solo_file_num, 
                group_file_num)))

    })

    sl_grp_clls <- do.call(rbind, sl_grp_cll)

    return(sl_grp_clls)
})

sl_vs_grp <- do.call(rbind, sl_vs_grps)


# remove 21 due to no group calls left
sl_vs_grp <- sl_vs_grp[sl_vs_grp$group != "21", ]

# add metadata
caps <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", sheet = "Capturas"))

exps_mtdt <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", 
    sheet = "Experimento video coor vuelo"))

# time in seconds
exps_mtdt$flight_time <- round(as.numeric(exps_mtdt$`Tiempo de vuelo aprox`) * 60/0.04166667)

# remove NA in audio
exps_mtdt <- exps_mtdt[!is.na(exps_mtdt$Audio), ]

## Add flight time for solo and group flight
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {

    out <- if (sl_vs_grp$experiment[x] == "solo") 
        unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Individuo == sl_vs_grp$indiv[x] & 
            exps_mtdt$Experimento == "vuelo solo" & !is.na(exps_mtdt$Individuo)])) else unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Grupo == sl_vs_grp$group[x] & 
        exps_mtdt$Experimento == "vuelo grupal/sin sonido"]))

    return(out)
})


# flight time
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {

    ft <- unique(na.omit(exps_mtdt$flight_time[which(as.numeric(exps_mtdt$Audio) == 
        sl_vs_grp$sound_file_number[x])]))

    if (length(ft) == 0) 
        ft <- NA
    return(ft)
})

# add sex
sl_vs_grp$sex <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Sexo[caps$Murci == 
    x])[1], USE.NAMES = FALSE)

sl_vs_grp$sex <- ifelse(sl_vs_grp$sex == "m", "Male", "Female")

# add age
sl_vs_grp$age <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Edad[caps$Murci == 
    x])[1], USE.NAMES = FALSE)

sl_vs_grp$age <- ifelse(sl_vs_grp$age == "sa", "Sub-adult", "Adult")

sl_vs_grp$reprod.stg <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$`Estado reproductivo`[caps$Murci == 
    x])[1], USE.NAMES = FALSE)

sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "p?"] <- "Pregnant"
sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "ne"] <- "Inactive"

# order levels
sl_vs_grp$experiment <- factor(sl_vs_grp$experiment, levels = c("solo", "group"))


sl_vs_grp$call_rate <- sl_vs_grp$calls/(sl_vs_grp$flight_time/60)

# calling rate solo vs group flights ggplot(sl_vs_grp, aes(y = call_rate, x =
# experiment, col = indiv, shape = sex)) + geom_hline(yintercept =
# mean(sl_vs_grp$call_rate[sl_vs_grp$experiment == 'group']), lty = 2, alpha =
# 0.5) + geom_hline(yintercept = mean(sl_vs_grp$call_rate[sl_vs_grp$experiment ==
# 'solo'], na.rm = TRUE), lty = 2, alpha = 0.5) + geom_point(size = 5, alpha =
# 0.8) + theme_classic() + scale_color_viridis_d(alpha = 0.4, guide=FALSE) +
# ggtitle('Change in calling rate by sex and individual') + labs(x =
# 'Experiment', y = 'Calling rate (calls / min)') + geom_line(aes(group = indiv),
# col = 'gray43', alpha = 0.8) + facet_wrap(~ group, nrow = 4)

# violin plots ggplot(sl_vs_grp, aes(y = prop_calls, x = experiment, col = indiv,
# shape = sex)) + geom_point(size = 5, alpha = 0.8) + theme_classic() +
# scale_color_viridis_d(alpha = 0.4, guide=FALSE) + ggtitle('Original
# accuracies') + labs(y = 'Mean accuracy', x = 'Model') + geom_line(aes(group =
# indiv), col = 'gray43', alpha = 0.8) + facet_wrap(~ group, nrow = 4) +
# ggtitle(label = 'Change in calling activity by sex and individual')
# Using only calls with class probabilities higher than `r thresh_prob` and
# assinging low probability calls evenly across individuals (as all calls are
# above the threshold then all calls are being used)

# get group and solo call counts per individual
sl_vs_grps <- lapply(names(acous_param_l), function(x) {

    acous_param <- acous_param_l[[x]]

    group_file <- acous_param$sound.files[acous_param$experiment == "vuelo grupal/sin sonido"][1]

    # get sound file number for group flights
    group_file <- strsplit(group_file, ".wav")[[1]][1]
    group_file_num <- as.numeric(substr(group_file, start = nchar(group_file) - 6, 
        nchar(group_file)))

    # get data for solo flights
    solo <- acous_param[acous_param$experiment == "vuelo solo", ]

    sl_grp_cll <- lapply(unique(solo$indiv), function(y) {

        solo_calls <- sum(solo$indiv == y)

        # taking into acount those with class prob higher than
        group_calls <- sum(agg_pred$group == x & agg_pred$pred_indiv == y & agg_pred$max_prob > 
            thresh_prob)

        # add low probability calls to group calls
        group_calls <- group_calls + sum(agg_pred$group == x & agg_pred$max_prob <= 
            thresh_prob)/length(unique(agg_pred$pred_indiv[agg_pred$group == x]))

        # get sound file number for group flights
        solo_file <- solo$sound.files[solo$indiv == y][1]
        solo_file <- strsplit(solo_file, ".wav")[[1]][1]
        solo_file_num <- as.numeric(substr(solo_file, start = nchar(solo_file) - 
            6, nchar(solo_file)))

        return(data.frame(group = x, indiv = y, experiment = c("solo", "group"), 
            calls = c(solo_calls, group_calls), prop_calls = c(solo_calls/nrow(solo), 
                group_calls/sum(agg_pred$group == x)), sound_file_number = c(solo_file_num, 
                group_file_num), sound_files = c(solo_file, group_file)))

    })

    sl_grp_clls <- do.call(rbind, sl_grp_cll)

    return(sl_grp_clls)
})

sl_vs_grp <- do.call(rbind, sl_vs_grps)

# remove 21 due to no group calls left
sl_vs_grp <- sl_vs_grp[sl_vs_grp$group != "21", ]

# add metadata
caps <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", sheet = "Capturas"))

exps_mtdt <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_3.xlsx", 
    sheet = "Experimento video coor vuelo"))

# time in seconds
exps_mtdt$flight_time <- round(as.numeric(exps_mtdt$`Tiempo de vuelo aprox`) * 60/0.04166667)

# remove NA in audio
exps_mtdt <- exps_mtdt[!is.na(exps_mtdt$Audio), ]

## Add flight time for solo and group flight
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {

    out <- if (sl_vs_grp$experiment[x] == "solo") 
        unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Individuo == sl_vs_grp$indiv[x] & 
            exps_mtdt$Experimento == "vuelo solo" & !is.na(exps_mtdt$Individuo)])) else unique(na.omit(exps_mtdt$flight_time[exps_mtdt$Grupo == sl_vs_grp$group[x] & 
        exps_mtdt$Experimento == "vuelo grupal/sin sonido"]))

    return(out)
})


# flight time
sl_vs_grp$flight_time <- sapply(1:nrow(sl_vs_grp), function(x) {

    ft <- unique(na.omit(exps_mtdt$flight_time[which(as.numeric(exps_mtdt$Audio) == 
        sl_vs_grp$sound_file_number[x])]))

    if (length(ft) == 0) 
        ft <- NA
    return(ft)
})

# add sex
sl_vs_grp$sex <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Sexo[caps$Murci == 
    x])[1], USE.NAMES = FALSE)

sl_vs_grp$sex <- ifelse(sl_vs_grp$sex == "m", "Male", "Female")

# add age
sl_vs_grp$age <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$Edad[caps$Murci == 
    x])[1], USE.NAMES = FALSE)

sl_vs_grp$age <- ifelse(sl_vs_grp$age == "sa", "Sub-adult", "Adult")

sl_vs_grp$reprod.stg <- sapply(sl_vs_grp$indiv, function(x) na.exclude(caps$`Estado reproductivo`[caps$Murci == 
    x])[1], USE.NAMES = FALSE)

sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "p?"] <- "Pregnant"
sl_vs_grp$reprod.stg[sl_vs_grp$reprod.stg == "ne"] <- "Inactive"

# order levels
sl_vs_grp$experiment <- factor(sl_vs_grp$experiment, levels = c("solo", "group"))


sl_vs_grp$call_rate <- sl_vs_grp$calls/(sl_vs_grp$flight_time/60)

# calling rate solo vs group flights
ggplot(sl_vs_grp[sl_vs_grp$group != "22", ], aes(y = call_rate, x = experiment, col = indiv, 
    shape = sex)) + geom_hline(yintercept = mean(sl_vs_grp$call_rate[sl_vs_grp$experiment == 
    "group"]), lty = 2, alpha = 0.5) + geom_hline(yintercept = mean(sl_vs_grp$call_rate[sl_vs_grp$experiment == 
    "solo"], na.rm = TRUE), lty = 2, alpha = 0.5) + geom_point(size = 5, alpha = 0.8) + 
    theme_classic() + scale_color_viridis_d(alpha = 0.4, guide = FALSE) + ggtitle("Change in calling rate by sex and individual") + 
    labs(x = "Experiment", y = "Calling rate (calls / min)") + geom_line(aes(group = indiv), 
    col = "gray43", alpha = 0.8) + facet_wrap(~group, nrow = 4)

Gaps between calls for solo and group flights

clls <- readRDS("./output/CALLS_fixed_detections_inquiry_calls.RDS")

sl_vs_grp$sound_files <- paste0(sl_vs_grp$sound_files, ".wav")


seltab <- attributes(clls)$check.results

gap_l <- lapply(1:nrow(sl_vs_grp), function(x) {

    Y <- seltab[seltab$orig.sound.files == sl_vs_grp$sound_files[x], ]
    Y <- Y[order(Y$orig.start), ]
    gaps <- Y$orig.start[-1] - Y$orig.end[-nrow(Y)]

    return(data.frame(group = sl_vs_grp$group[x], sound.files = sl_vs_grp$sound_files[x], 
        experiment = sl_vs_grp$experiment[x], gaps = ifelse(length(gaps) == 0, NA, 
            gaps)))

})

gaps <- do.call(rbind, gap_l)

# ggplot(gaps, aes(x = gaps, fill = experiment, group = experiment)) +
# geom_density() + scale_fill_viridis_d(alpha = 0.4) + ggtitle('Gaps sec by
# group') + theme_classic() + facet_wrap(~group, ncol = 4, scales = 'free_y')

ggplot(gaps, aes(x = gaps, fill = experiment, group = experiment)) + geom_density() + 
    ggtitle("Gaps sec groups combined") + scale_fill_viridis_d(alpha = 0.4) + theme_classic()

### Number of calls per individual by group

agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights.RDS")

# remove repeated
agg_pred <- agg_pred[-c(200, 156, 30, 9, 160, 435, 432), ]

agg_pred$abbr_indiv <- as.numeric(as.factor(agg_pred$pred_indiv))
agg_pred$indiv <- as.factor(agg_pred$abbr_indiv)

agg_indiv <- aggregate(abbr_indiv ~ indiv + group, data = agg_pred, FUN = length)

agg_indiv$indiv <- as.character(agg_indiv$indiv)

agg_indiv <- agg_indiv[order(agg_indiv$group, agg_indiv$abbr_indiv), ]

agg_indiv$indiv <- as.factor(agg_indiv$indiv)

ggplot(agg_indiv, aes(y = abbr_indiv, x = indiv, fill = indiv)) + geom_bar(stat = "identity") + 
    ggtitle("Total calls per individual in group flight") + scale_fill_viridis_d(alpha = 0.7) + 
    facet_wrap(~group, scales = "free", nrow = 4) + guides(fill = FALSE) + theme_classic()

Calls location in group flight by individual

# read as RDS
agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights.RDS")

agg_pred$orig.sound.files <- paste0(sapply(agg_pred$sound.files, function(x) strsplit(x, 
    ".wav")[[1]][1]), ".wav")

agg_pred$start <- sapply(agg_pred$sound.files, function(x) seltab$orig.start[seltab$sound.files == 
    x])

agg_pred$end <- sapply(agg_pred$sound.files, function(x) seltab$orig.end[seltab$sound.files == 
    x])

agg_pred$middle.time <- (agg_pred$start + agg_pred$end)/2

agg_pred$abbr_indiv <- as.numeric(as.factor(agg_pred$pred_indiv))

agg_pred$group2 <- make.names(agg_pred$group)


agg_pred <- agg_pred[!duplicated(agg_pred[, c("group", "abbr_indiv", "start", "end")]), 
    ]

agg_pred$row <- 1:nrow(agg_pred)

# this are repeated agg_pred[c(200, 206, 156, 219, 30, 31, 9, 60, 160, 225, 486,
# 435, 438, 432), ]

agg_pred <- agg_pred[-c(200, 156, 30, 9, 160, 435, 432), ]

out <- lapply(unique(agg_pred$group2), function(x) {

    print(x)
    Y <- agg_pred[agg_pred$group2 == x, ]

    # jpeg(filename = file.path('img/group_call_through_time', paste0(x, '.jpeg')),
    # width = 480 * 3, height = 480 * 2, res = 200)

    par(mar = c(1, 0, 0, 0), mfrow = c(4, 1))

    Y$middle.time <- Y$middle.time - min(Y$middle.time)

    num.ind <- as.numeric(as.factor(Y$abbr_indiv))

    num.ind <- scale(num.ind, center = TRUE)

    # sq <- seq(0, max(Y$middle.time), length.out = 5)
    sq <- seq(0, 120, length.out = 5)

    for (i in 1:(length(sq) - 1)) {

        plot(0, 0, col = "white", xlim = c(sq[i] - 0.05, sq[i + 1] + 0.05), ylim = range(num.ind) + 
            c(-2, 2), bty = "u")

        points(x = Y$middle.time, y = num.ind, cex = 7, col = viridis(max(as.numeric(as.factor(Y$abbr_indiv))), 
            alpha = 0.5)[as.numeric(as.factor(Y$abbr_indiv))], pch = 20)

        # text(x = Y$middle.time, y = num.ind + rnorm(nrow(Y), sd = 0.8), Y$row, cex =
        # 0.6, col = 'black')
    }

    # dev.off()

})
## [1] "X4"

## [1] "X3"

## [1] "X1"

## [1] "X40"

## [1] "X9"

## [1] "X15"

## [1] "X13"

## [1] "X18.A.B"

## [1] "X6.."

## [1] "X12.12.N.N"

## [1] "X12.NN"

lapply(unique(agg_pred$group), function(x) {

    Y <- agg_pred[agg_pred$group == x, ]

    mc <- markov.test(Y$abbr_indiv, type = "rank")

    return(mc)
})



seq <- simulateMarkovChain(5000, matrix(0.25, 4, 4), states = c("a", "c", "g", "t"))

markov.test(seq)

rnd.seq <- sample(seq)
markov.test(rnd.seq)



sequenceMatr <- createSequenceMatrix(sequence, sanitize = FALSE)
mcFitMLE <- markovchainFit(data = sequence)

rnd.seq <- sample(sequence)

rnd <- createSequenceMatrix(rnd.seq, sanitize = FALSE)
mcFitMLE2 <- markovchainFit(data = rnd.seq)


mcFitBSP <- markovchainFit(data = sequence, method = "bootstrap", nboot = 5, name = "Bootstrap Mc")


sim.sq <- rep(c("a", "b", "c", "d"), 100)

verifyMarkovProperty(sim.sq)

sim.sq2 <- sample(c("a", "b", "c", "d"), size = 100, replace = TRUE)
verifyMarkovProperty(sim.sq2)


weatherStates <- c("sunny", "cloudy", "rain")

byRow <- TRUE

weatherMatrix <- matrix(data = c(0.7, 0.2, 0.1, 0.3, 0.4, 0.3, 0.2, 0.45, 0.35), 
    byrow = byRow, nrow = 3, dimnames = list(weatherStates, weatherStates))

mcWeather <- new("markovchain", states = weatherStates, byrow = byRow, transitionMatrix = weatherMatrix, 
    name = "Weather")

mcIgraph <- as(mcWeather, "igraph")
plot(mcIgraph)

Session information

## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] spgs_1.0-3         TraMineR_2.2-1     igraph_1.2.6       markovchain_0.8.6 
##  [5] soundgen_2.0.0     shinyBS_0.61       Sim.DiffProc_4.8   rlang_0.4.11      
##  [9] kableExtra_1.3.4   viridis_0.6.1      viridisLite_0.4.0  pbapply_1.4-3     
## [13] e1071_1.7-6        caret_6.0-88       ggplot2_3.3.3      lattice_0.20-44   
## [17] ranger_0.12.1      readxl_1.3.1       warbleR_1.1.27     NatureSounds_1.0.4
## [21] knitr_1.33         seewave_2.1.6      tuneR_1.3.3        devtools_2.4.1    
## [25] usethis_2.0.1     
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.2.1      Hmisc_4.5-0          systemfonts_1.0.1   
##   [4] plyr_1.8.6           splines_4.0.5        digest_0.6.27       
##   [7] foreach_1.5.1        htmltools_0.5.1.1    fansi_0.4.2         
##  [10] checkmate_2.0.0      magrittr_2.0.1       memoise_2.0.0       
##  [13] cluster_2.1.2        remotes_2.3.0        recipes_0.1.16      
##  [16] gower_0.2.2          RcppParallel_5.1.4   svglite_2.0.0       
##  [19] prettyunits_1.1.1    jpeg_0.1-8.1         colorspace_2.0-1    
##  [22] signal_0.7-6         rvest_1.0.0          xfun_0.23           
##  [25] dplyr_1.0.5          callr_3.7.0          crayon_1.4.1        
##  [28] RCurl_1.98-1.3       jsonlite_1.7.2       survival_3.2-11     
##  [31] iterators_1.0.13     glue_1.4.2           gtable_0.3.0        
##  [34] ipred_0.9-11         webshot_0.5.2        pkgbuild_1.2.0      
##  [37] scales_1.1.1         Rcpp_1.0.6           dtw_1.22-3          
##  [40] htmlTable_2.2.1      xtable_1.8-4         foreign_0.8-81      
##  [43] matlab_1.0.2         proxy_0.4-25         Formula_1.2-4       
##  [46] stats4_4.0.5         lava_1.6.9           prodlim_2019.11.13  
##  [49] htmlwidgets_1.5.3    httr_1.4.2           RColorBrewer_1.1-2  
##  [52] ellipsis_0.3.2       farver_2.1.0         pkgconfig_2.0.3     
##  [55] nnet_7.3-16          sass_0.3.1           utf8_1.2.1          
##  [58] labeling_0.4.2       tidyselect_1.1.1     reshape2_1.4.4      
##  [61] later_1.2.0          munsell_0.5.0        cellranger_1.1.0    
##  [64] tools_4.0.5          cachem_1.0.5         cli_2.5.0           
##  [67] generics_0.1.0       evaluate_0.14        stringr_1.4.0       
##  [70] fastmap_1.1.0        yaml_2.2.1           ModelMetrics_1.2.2.2
##  [73] processx_3.5.2       fs_1.5.0             purrr_0.3.4         
##  [76] nlme_3.1-152         mime_0.10            formatR_1.9         
##  [79] xml2_1.3.2           compiler_4.0.5       rstudioapi_0.13     
##  [82] png_0.1-7            testthat_3.0.2       tibble_3.1.2        
##  [85] bslib_0.2.4          stringi_1.6.2        highr_0.9           
##  [88] ps_1.6.0             desc_1.3.0           Matrix_1.3-3        
##  [91] fftw_1.0-6           vctrs_0.3.8          pillar_1.6.1        
##  [94] lifecycle_1.0.0      jquerylib_0.1.4      data.table_1.14.0   
##  [97] bitops_1.0-7         httpuv_1.6.1         R6_2.5.0            
## [100] latticeExtra_0.6-29  promises_1.2.0.1     gridExtra_2.3       
## [103] sessioninfo_1.1.1    codetools_0.2-18     boot_1.3-28         
## [106] MASS_7.3-54          pkgload_1.2.1        rprojroot_2.0.2     
## [109] rjson_0.2.20         withr_2.4.2          Deriv_4.1.3         
## [112] expm_0.999-6         parallel_4.0.5       grid_4.0.5          
## [115] rpart_4.1-15         timeDate_3043.102    class_7.3-19        
## [118] rmarkdown_2.8        pROC_1.17.0.1        shiny_1.6.0         
## [121] lubridate_1.7.10     base64enc_0.1-3